import plotly.offline as pyo
from plotly.graph_objs import *
import chart_studio.plotly as py
import pandas as pd
from pandas import DataFrame
import random
pyo.offline.init_notebook_mode()
emissions = pd.read_csv(r"../Data/TotalCo2EmissionsByCountry.csv", index_col=0)
emissions.head()
| Year | Afghanistan | AFG | Albania | ALB | Algeria | DZA | American Samoa | ASM | Andorra | AND | Angola | AGO | Antigua and Barbuda | ATG | Arab World | ARB | Argentina | ARG | ... | Uzbekistan | UZB | Vanuatu | VUT | Venezuela, RB | VEN | Vietnam | VNM | Virgin Islands (U.S.) | VIR | West Bank and Gaza | PSE | World | WLD | Yemen, Rep. | YEM | Zambia | ZMB | Zimbabwe | ZWE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1960 | 414.371 | 2024.184 | 6160.560 | NaN | NaN | 550.050 | 36.670 | 59563.98922 | 48815.104 | ... | NaN | NaN | 57069.521 | 7491.681 | NaN | NaN | 9.396706e+06 | 3633.997 | NaN | NaN |
| 1 | 1961 | 491.378 | 2280.874 | 6065.218 | NaN | NaN | 454.708 | 47.671 | 65151.09581 | 51180.319 | ... | NaN | NaN | 51928.387 | 7986.726 | NaN | NaN | 9.434403e+06 | 2665.909 | NaN | NaN |
| 2 | 1962 | 689.396 | 2464.224 | 5669.182 | NaN | NaN | 1180.774 | 102.676 | 74357.70773 | 53695.881 | ... | NaN | 40.337 | 54106.585 | 9347.183 | NaN | NaN | 9.818840e+06 | 3887.020 | NaN | NaN |
| 3 | 1963 | 707.731 | 2082.856 | 5427.160 | NaN | NaN | 1151.438 | 84.341 | 87895.97916 | 50083.886 | ... | NaN | 33.003 | 56204.109 | 9119.829 | NaN | NaN | 1.035575e+07 | 2918.932 | NaN | NaN |
| 4 | 1964 | 839.743 | 2016.850 | 5650.847 | NaN | NaN | 1224.778 | 91.675 | 103196.28160 | 55727.399 | ... | NaN | 62.339 | 56603.812 | 11800.406 | NaN | NaN | 1.094701e+07 | 3633.997 | 3278.298 | 4473.74 |
5 rows × 249 columns
def createStackedPropArea(df, time, cols, title, yaxisTitle):
"""
A function which manipulates the data into the correct format to produce a stacked proportional area plot with Plotly.
Takes five arguments:
df - a pandas DataFrame
time - the time element of the data, must be a column in the DataFrame as a string
cols - the name of the columns in the DataFrame which you want to include in the area plot as list
title - the title of the chart
yaxisTitle - the yaxis title of the chart (the xaxis title comes from the time variable)
"""
PCcols = []
traces = []
stackedAreaDF = df.loc[:, ([time] + cols)]
stackedAreaDF['Total'] = stackedAreaDF[cols].sum(axis =1)
stackedAreaDF.fillna(0, inplace=True)
for col in cols:
stackedAreaDF["pc_"+str(col)] = stackedAreaDF[col] / stackedAreaDF['Total']
PCcols.append("pc_"+str(col))
stackedPCAreaDF = stackedAreaDF[PCcols].cumsum(axis=1)
stackedPCAreaDF[time] = stackedAreaDF[time]
for col in PCcols:
traces.append({'type' : 'scatter',
'x' : stackedPCAreaDF[time],
'y' : stackedPCAreaDF[col],
'name' : col[3:-6],
'mode' : 'lines',
'fill' : 'tonexty'})
data = Data(traces)
layout = {'title' : title,
'xaxis' : {'title' : time},
'yaxis' : {'title' : yaxisTitle}}
fig = Figure(data = data, layout = layout)
pyo.iplot(fig)
test = createStackedPropArea(emissions, 'Year', ['United Arab Emirates | ARE','United Kingdom | GBR',
'United States | USA','China | CHN', 'India | IND',],
"Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
test
createStackedPropArea(emissions, 'Year', ['China | CHN',
'United States | USA',
'India | IND',
'United Arab Emirates | ARE',
'United Kingdom | GBR',
],
"Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')
countries = []
for i in range(10):
countries.append(random.choice(emissions.columns.tolist()))
countries
['Poland | POL', 'Cambodia | KHM', 'Qatar | QAT', 'Afghanistan | AFG', 'Aruba | ABW', 'Sub-Saharan Africa (developing only) | SSA', 'Upper middle income | UMC', 'St. Kitts and Nevis | KNA', 'Malaysia | MYS', 'Yemen, Rep. | YEM']
createStackedPropArea(emissions, 'Year', countries, "Proportion of Co2 Emissions, 1960-2015", 'Proprtion of Co2 Emissions')